#include "opencl_source_map.hpp" 
namespace MNN { 
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* unary_buf = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"inline float4 gelu(float4 in){\n"
" float4 value=0.79788458f*(0.044715f*in*in*in+in);\n"
" float4 x2=value*value;\n"
" float4 dst=value>(float4)5.0f ? (float4)1.0f : (value <= -(float4)5.0f ? -(float4)1.0f :\n"
" (value*(135135.0f+x2*(17325.0f+x2*(378.0f+x2))))/(135135.0f+x2*(62370.0f+x2*(3150.0f+x2*28.0f))));\n"
" return (1.0f+dst)*in*0.5f;\n"
"}\n"
"__kernel void unary_buf(GLOBAL_SIZE_2_DIMS\n"
" __global const INPUT_TYPE *input,\n"
" __global OUTPUT_TYPE *output,\n"
" __private const int size) {\n"
" const int x=get_global_id(0);\n"
" const int y=get_global_id(1);\n"
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
" const int offset=x << 2;\n"
"#ifdef PACK_LEAVE\n"
" if(offset+3 >= size){\n"
" int remain=size-offset;\n"
" float4 in;\n"
" float* in_ptr=(float*)&in;\n"
" for(int i=0; i<remain; ++i){\n"
" in_ptr[i]=(float)input[offset+i];\n"
" }\n"
" float4 out=OPERATOR;\n"
" float* out_ptr=(float*)&out;\n"
" for(int i=0; i<remain; ++i){\n"
" output[offset+i]=(OUTPUT_TYPE)out_ptr[i];\n"
" }\n"
" }else {\n"
"#endif\n"
" float4 in=convert_float4(vload4(0,input+offset));\n"
" float4 out=OPERATOR;\n"
" vstore4(CONVERT_OUTPUT4(out),0,output+offset);\n"
"#ifdef PACK_LEAVE\n"
" }\n"
"#endif\n"
"}\n"
;
#endif
}
